# coding: utf-8
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from crawlspidertest.items import ZLItem
from scrapy.exceptions import CloseSpider

class ZLSpider(CrawlSpider):
    name = 'zhilian'
    allow_domains = ['zhaopin.com']
    start_urls = ['http://sou.zhaopin.com/jobs/searchresult.ashx?jl=%E6%AD%A6%E6%B1%89&kw=ios&p=1&isadv=0']

    rules = [
                Rule(LinkExtractor(allow=('/jobs/searchresult.ashx\?jl=.+&kw=ios',)), callback='mainparse', follow=True),
                Rule(LinkExtractor(allow='jobs.zhaopin.com/\d+.htm'),callback='next_Page', follow= True, process_links=None)
            ]

    # def spider_urls(self,links):
    #     pass
    #     for link in links:
    #         print(link.url)
    #
    #     return links

    def mainparse(self, response):
        pass
        base_p = response.xpath('//table[@class="newlist"]//tr[1]')
        print('len------------', len(base_p))
        print('url------------', response.url)
        if len(base_p) == 0:
            raise CloseSpider('close it')

        for bp in base_p:

            z_w = bp.xpath('./td[1]//a/b/text()').extract()
            if len(z_w):
                item = ZLItem()
                item['work_name'] = z_w[0] + bp.xpath('./td[1]//a/text()').extract()[0]
                item['company'] = bp.xpath('./td[3]/a[1]/text()').extract()[0]
                item['money'] = bp.xpath('./td[4]/text()').extract()[0]
                item['address'] = bp.xpath('./td[5]/text()').extract()[0]
                yield item


    def next_Page(self, response):
        pass
        da = response.xpath('//div[@class="tab-inner-cont"]/p[1]/text()').extract()
        print('tgy100',response.url,da)